/*

Purpose: Generate plot comparing response functions of main and no precipitation
models for high and low income groups. Produces Appendix Figure D7

These plots are produce by dividing the estimating sample into two groups on
either side of median income. Predicted response functions at the mean of covariates
within each bin are plotted using the regression coefficients estimated in
`age_spec_interacted_regressions.do` and `noprecip_regressions.do`

Inputs
------

- `0_data_cleaning/3_final/global_mortality_panel` - Final mortality panel.

- `data/1_estimation/1_ster/age_spec_interacted`
    - `Agespec_interaction_response.ster` - Ster file containing results from
    an age-stacked regression interacted with ADM1 average income and climate.
    - `Agespec_interaction_response_noprecip.ster` - Ster file containing results
    from an age-stacked regression interacted with ADM1 climate and time variant income.


Outputs
-------

- `output/1_estimation/figures/5_response_func_compare/noprecip`
    - `Age*_response_compare_noprecip.pdf` - 2 x 1 array with the
    showing the highest and lowest income tercile response functions for the
    main and noprecip models.
    - `Age*_response_compare_noprecip_dif.pdf` - 2 x 2 array showing the difference
    between the response functions of the alternative and main models at each 
    degree by income group, as well as histograms plotting the population - weighted 
    mean of days in the year falling in that bin for that income group.

*/

*************************************************************************
*                           PART A. Initializing                        *
*************************************************************************

clear all
set more off
set matsize 10000
set maxvar 32700

set scheme s1color


if "$REPO" == "" {
    global REPO: env REPO
    global DB: env DB 
    global OUTPUT: env OUTPUT 

    do "$REPO/carleton_mortality_2022/0_data_cleaning/1_utils/set_paths.do"
}


local STER "$ster_dir"
local OUTPUT "$output_dir/figures/Figure_D7_noprecip"
local DATA "$data_dir/3_final"


*************************************************************************
*                           PART B. Toggles                         *
*************************************************************************

* trim response function to only be evaluated over middle 99% of daily temp distribution
local trimrf = 0

* do you want a secondary Y axis on the dif plots which is the % of the response funtion at 35 degrees?
loc secaxis = 0

* number of panels
local panels = 2

* xtile income groups (takes lowest and highest)
local g = 3

* polynomial
local o = 4

* set baseline temp
local omit = 20

* x-range for plotting
local x_min = -5
local x_max = 40
local x_int = 10


*************************************************************************
*                           PART C. Prepare Dataset                     *
*************************************************************************

* Prepare data for regressions.
use "$data_dir/3_final/global_mortality_panel_covariates", clear

* sort the panel
sort adm1_code agegroup year


*create var for income tercile
preserve
    collapse (mean) loggdppc_adm1_avg, by(adm1_code year)
    xtile ytile = loggdppc_adm1_avg, nq(`g')
    keep adm1_code year ytile
    tempfile tercile
    save "`tercile'", replace
restore

merge m:1 adm1_code year using "`tercile'", nogen

tab ytile


*----------------------------------
* aggregating temperature bins
*----------------------------------

forval y = 1/3 {

    preserve

    keep if ytile == `y'
    di _N

    bysort year agegroup: egen tot_pop = total(population)
    gen weight = population / tot_pop


    * save weighted covariate means for prediction
    foreach var of varlist loggdppc_adm1_avg lr_tavg_GMFD_adm1_avg {

        sum `var' [aw = weight]
        loc zvalue_`var'_`y' = r(mean)
    }


    * create empty global array of values
    global values

    * make list of all bin variables
    ds tavg_bins*GMFD

    foreach var in `r(varlist)' {
        * take weighted avg of bin and save
        sum `var' [aw = weight] 
        global values ${values} `r(mean)' 
    }

    * drop all and set new length to how many bins there are
    drop _all
    local num : list sizeof global(values)
    set obs `num'

    gen value = .
    gen bin = .

    forval i = 1/`num' {
        local thisval : word `i' of ${values}
        replace value = `thisval' if _n == `i'
    }

    * calculate cumulative days
    gen sumval = sum(value)
    * ensure it equals 365
    sum sumval
    loc daymax = r(max)

    replace bin = _n
    replace bin = bin - 41

    * save min bin number if we are dropping bottom 0.5% of temp days ( < 3.65 in cumulative days)
    count if sumval < (`daymax' * .005)
    loc p1temp_`y' = r(N) - 41

    * save min bin number if we are dropping top 0.5% of temp days ( > 361.35 in cumulative days)
    count if sumval > (`daymax' * .995)
    loc p99temp_`y' = _N - r(N) - 41

    drop if bin <= `x_min'
    drop if bin >= `x_max'

    *---------------------------------------------
    * Plot t dist histograms
    *---------------------------------------------

    * set lines for 99% t destribution 
    if `p1temp_`y'' > `x_min'    loc xdashmin "xline(`p1temp_`y'', lcolor(navy) lwidth(vthin) lpattern(shortdash))"
    else                            loc xdashmin ""

    if `p99temp_`y'' < `x_max'   loc xdashmax "xline(`p99temp_`y'', lcolor(navy) lwidth(vthin) lpattern(shortdash))"
    else                            loc xdashmax ""
    

    if `secaxis' == 1 {

        graph tw bar value bin, color(navy) yaxis(1) || bar value bin, color(navy) yaxis(2)  /// 
        ytitle("exposure (days)", size(small) axis(1)) ytitle(" ", size(small) axis(2)) ///
        xtitle("Daily temperature (C)", size(small)) fysize(40) xlabel(`x_min'(`x_int')`x_max', labsize(small)) ///
        ylabel(0(10)30, labsize(small) axis(1)) ylabel(0(10)30, labsize(small) axis(2)) ///
        legend(off) name(hist_`y') `xdashmin' `xdashmax'
    }

    else {

        graph tw bar value bin, color(navy) name(hist_`y') /// 
        ytitle("exposure (days)", size(small)) xtitle("Daily temperature (C)", size(small)) `xdashmin' `xdashmax' fysize(40) ///
        xlabel(`x_min'(`x_int')`x_max', labsize(small)) ylabel(0(10)30, labsize(small)) ///
        text(25 5 "middle 99% temp distribution", size(small))
    }

    restore
}


*************************************************************************
*                       PART D. Generate Plots                          *
*************************************************************************

*----------------------------------
*generating age-specific plots
*----------------------------------

*create obs of 1 degree range from min to max
local min = `x_min'
local max = `x_max'
local obs = `max' - `min' + 1

drop if _n > 0
set obs `obs'
replace tavg_poly_1_GMFD = _n + `min' - 1


foreach age of numlist 1/3 {

    forval y = 1/3 {

        *------------------------------------
        *Generate predictions for main model
        *------------------------------------


        *main model estimates
        estimate use "`STER'/age_spec_interacted/Agespec_interaction_response.ster"


        *uninteracted terms
        local line = "_b[`age'.agegroup#c.tavg_poly_1_GMFD]*(tavg_poly_1_GMFD-`omit')"
        foreach k of num 2/`o' {
            local add = "+ _b[`age'.agegroup#c.tavg_poly_`k'_GMFD]*(tavg_poly_1_GMFD^`k' - `omit'^`k')"
            local line "`line' `add'"
            }

        *lgdppc and Tmean at the tercile mean
        foreach var in "loggdppc_adm1_avg" "lr_tavg_GMFD_adm1_avg" {
            loc z = `zvalue_`var'_`y''
            foreach k of num 1/`o' {
                local add = "+ _b[`age'.agegroup#c.`var'#c.tavg_poly_`k'_GMFD] * (tavg_poly_1_GMFD^`k' - `omit'^`k') * `z'"
                local line "`line' `add'"
                }
            }


        di "`line'"
        predictnl yhat_main_`y'_`age' = `line', se(se_main_`y'_`age') ci(lowerci_main_`y'_`age' upperci_main_`y'_`age')


        * save response function level at 35 degrees
        loc rf35_`y'_`age' = yhat_main_`y'_`age'[41]



        *----------------------------------
        *Generate predictions for alternative model
        *----------------------------------


        *noprecip model estimates
        estimate use "`STER'/diagnostic_specs/Agespec_interaction_response_noprecip.ster"

        *uninteracted terms
        local line = "_b[`age'.agegroup#c.tavg_poly_1_GMFD]*(tavg_poly_1_GMFD-`omit')"
        foreach k of num 2/`o' {
            local add = "+ _b[`age'.agegroup#c.tavg_poly_`k'_GMFD]*(tavg_poly_1_GMFD^`k' - `omit'^`k')"
            local line "`line' `add'"
            }

        *lgdppc and Tmean at the tercile mean
        foreach var in "loggdppc_adm1_avg" "lr_tavg_GMFD_adm1_avg" {
            loc z = `zvalue_`var'_`y''
            foreach k of num 1/`o' {
                local add = "+ _b[`age'.agegroup#c.`var'#c.tavg_poly_`k'_GMFD] * (tavg_poly_1_GMFD^`k' - `omit'^`k') * `z'"
                local line "`line' `add'"
                }
            }

        di "`line'"
        predictnl yhat_alt_`y'_`age' = `line', se(se_alt_`y'_`age') ci(lowerci_alt_`y'_`age' upperci_alt_`y'_`age')


        *----------------------------------
        * Calculate difference
        *----------------------------------

        * generate difference between yhat_main and yhat_alt
        gen yhat_dif_`y'_`age' = yhat_alt_`y'_`age' - yhat_main_`y'_`age'


        * generate series as a % of 35 degree RF level
         gen yhat_dif35_`y'_`age' = yhat_dif_`y'_`age' * 100 / abs(`rf35_`y'_`age'')



        * -----------------------------------------------------------------------
        * Identify y min and max (because ycommon won't work with histograms below)
        * -----------------------------------------------------------------------

        preserve

        if `trimrf' == 1 {
            drop if tavg_poly_1_GMFD < `p1temp_`y''
            drop if tavg_poly_1_GMFD > `p99temp_`y''
        }

        * set limits for diff chart (no CIs)
        sum yhat_dif_`y'_`age'
        loc yl_max_`y'_`age' = r(max)
        loc yl_min_`y'_`age' = r(min)

        * set upper limit for levels chart (CIs)
        sum upperci_main_`y'_`age'
        loc yc_main_max_`y'_`age' = r(max)

        sum upperci_alt_`y'_`age'
        loc yc_alt_max_`y'_`age' = r(max)

        loc yc_max_`y'_`age' = max(`yc_main_max_`y'_`age'', `yc_alt_max_`y'_`age'')


        * set lower limit for levels chart (CIs)
        sum lowerci_main_`y'_`age'
        loc yc_main_min_`y'_`age' = r(min)

        sum lowerci_alt_`y'_`age'
        loc yc_alt_min_`y'_`age' = r(min)

        loc yc_min_`y'_`age' = min(`yc_main_min_`y'_`age'', `yc_alt_min_`y'_`age'')

        restore
    }

    *----------------------------------
    * Set common Y axes for charts where ycommon won't work (because of hists)
    *----------------------------------

    * c is for CI, l is for line
    foreach s in "l" "c" {

        * identify total ymin and ymax
        loc y`s'_max_`age' = max(`y`s'_max_1_`age'', `y`s'_max_3_`age'')
        loc y`s'_min_`age' = min(`y`s'_min_1_`age'', `y`s'_min_3_`age'')


        loc y`s'range_`age' = `y`s'_max_`age'' - `y`s'_min_`age''
        di "range:"
        di `y`s'range_`age''


        if `y`s'range_`age'' < 2 {
            loc y`s'_max_`age' = round(`y`s'_max_`age''+.1, .2)
            loc y`s'_min_`age' = round(`y`s'_min_`age''-.1, .2)
            loc y`s'_int_`age' = .2
        }

        else if `y`s'range_`age'' > 2 & `y`s'range_`age'' < 8 {
            loc y`s'_max_`age' = ceil(`y`s'_max_`age'')
            loc y`s'_min_`age' = floor(`y`s'_min_`age'')
            loc y`s'_int_`age' = 1
        }

        else if `y`s'range_`age'' > 8 & `y`s'range_`age'' < 20 {
            loc y`s'_max_`age' = round(ceil(`y`s'_max_`age''+1), 2)
            loc y`s'_min_`age' = round(floor(`y`s'_min_`age''-1), 2)
            loc y`s'_int_`age' = 2
        }

        else if `y`s'range_`age'' > 20 & `y`s'range_`age'' < 50 {
            loc y`s'_max_`age' = round(ceil(`y`s'_max_`age''+2.5), 5)
            loc y`s'_min_`age' = round(floor(`y`s'_min_`age''-2.5), 5)
            loc y`s'_int_`age' = 5
        }

        else {
            loc y`s'_max_`age' = round(ceil(`y`s'_max_`age''+5), 10)
            loc y`s'_min_`age' = round(floor(`y`s'_min_`age''-5), 10)
            loc y`s'_int_`age' = 10   
        }

        di "set axes"
    }


    *************************************************************************
    *                       PART G. Generate Plots                          *
    *************************************************************************

    forval y = 1/3 {

        preserve

        * trim y hat if trimrf == 1
        if `trimrf' == 1 {
            drop if tavg_poly_1_GMFD < `p1temp_`y''
            drop if tavg_poly_1_GMFD > `p99temp_`y''
        }

        * set vertical lines for middle 99% of temperature days 
        if `p1temp_`y'' > `x_min'    loc xdashmin "xline(`p1temp_`y'', lcolor(navy) lwidth(vthin) lpattern(shortdash))"
        else                            loc xdashmin ""

        if `p99temp_`y'' < `x_max'   loc xdashmax "xline(`p99temp_`y'', lcolor(navy) lwidth(vthin) lpattern(shortdash))"
        else                            loc xdashmax ""


        * label chart titles
        if `y' == 1 loc inctit "Low"
        if `y' == 3 loc inctit "High"


        * set secondary y axis which is % of 35 degree RF
        loc y2_int_`y'_`age' = ceil(`yl_int_`age''*100/abs(`rf35_`y'_`age''))
        loc y2_max_`y'_`age' = round(`yl_max_`age''*100/abs(`rf35_`y'_`age''), `y2_int_`y'_`age'')
        loc y2_min_`y'_`age' = round(`yl_min_`age''*100/abs(`rf35_`y'_`age''), `y2_int_`y'_`age'')


        * set y axes for line chart, CI chart, and secondary axis
        loc yllab   "`yl_min_`age''(`yl_int_`age'')`yl_max_`age''"
        loc yclab   "`yc_min_`age''(`yc_int_`age'')`yc_max_`age''"
        loc y2lab   "`y2_min_`y'_`age''(`y2_int_`y'_`age'')`y2_max_`y'_`age''"  


        *---------------------------------------------------
        * Plot 2x1 chart showing 2 response functions and CIs
        *---------------------------------------------------

        graph tw line yhat_main_`y'_`age' yhat_alt_`y'_`age' tavg_poly_1_GMFD, lc(navy red) lwidth(medthin medthin) lpattern(solid shortdash) ///
        || rarea lowerci_main_`y'_`age' upperci_main_`y'_`age' tavg_poly_1_GMFD, col(navy%15) lwidth(none) ///
        || rarea lowerci_alt_`y'_`age' upperci_alt_`y'_`age' tavg_poly_1_GMFD, col(red%15) lwidth(none) ///
        , legend(order(1 "main model" 2 "no precip model") size(small) rows(1) rowgap(tiny)) ///
        yline(0, lcolor(gs5) lwidth(vthin)) title("`inctit' Income", size(medsmall)) `xdashmin' `xdashmax' ///
        ylabel(`yclab', labsize(small)) ytitle("mortality response", size(small)) ///
        xlabel(`x_min'(`x_int')`x_max', labsize(small)) xtitle("") name(g`y'_`age') fysize(80)

        di "saved graph 1"


        *---------------------------------------------
        * Plot chart showing diff in response functions
        *---------------------------------------------

        if `secaxis' == 1 {

            graph tw line yhat_dif_`y'_`age' tavg_poly_1_GMFD, lc(red) lwidth(medthin) yaxis(1) ///
            || line yhat_dif35_`y'_`age' tavg_poly_1_GMFD, lc(red) lwidth(none) yaxis(2) ///
            ylabel(`yllab', labsize(small) axis(1)) ytitle("difference in mortality", axis(1) size(small)) ///
            ylabel(`y2lab', labsize(vsmall) axis(2)) ytitle("% of 35 degree RF", axis(2) size(small)) ///
            , title("`inctit' Income", size(medsmall)) yline(0, lcolor(gs5) lwidth(vthin)) legend(off) `xdashmin' `xdashmax' ///
            xlabel(`x_min'(`x_int')`x_max', labsize(small)) xtitle("") name(h`y'_`age') fysize(80)        
        }

        else {

            graph tw line yhat_dif_`y'_`age' tavg_poly_1_GMFD, lc(red) lwidth(medthin) ///
            ylabel(`yllab', labsize(small)) ytitle("difference in mortality", size(small)) ///
            , title("`inctit' Income", size(medsmall)) yline(0, lcolor(gs5) lwidth(vthin)) legend(off) `xdashmin' `xdashmax' ///
            xlabel(`x_min'(`x_int')`x_max', labsize(small)) xtitle("") name(h`y'_`age') fysize(80)                
        }

        di "saved graph 2"

        restore
    }

    *----------------------------------
    * Combine Plots
    *---------------------------------- 

    * label chart titles
    if `age' == 1 loc agetit "< 5" 
    if `age' == 2 loc agetit "5 - 64"
    if `age' == 3 loc agetit "> 64"

    * combine 2x1 reponse func charts
    grc1leg g1_`age' g3_`age' hist_1 hist_3, cols(2) imargin(2 2 0 0 ) title("Heterogeneity in the Mortality-Temperature Relationship, Age `agetit'", size(medsmall)) ///
    subtitle("Robustness to Omission of Precipitation Terms in Model", size(small))

    graph export "`OUTPUT'/D7_Age`age'_response_compare_noprecip.pdf", replace
    
    * combine 2x2 response dif/histogram charts
    graph combine h1_`age' h3_`age' hist_1 hist_3, cols(2) imargin(2 2 0 0) title("Heterogeneity in the Mortality-Temperature Relationship, Age `agetit'", size(medsmall)) ///
    subtitle("Robustness to Omission of Precipitation Terms in Model", size(small))

    //graph export "`OUTPUT'/Age`age'_response_compare_noprecip_dif.pdf", replace
}


cap log close


